# Dylan Carlson Sirvent León (dylancarlson@g.harvard.edu)
# Code scrapes all files in EIA's Preliminary Monthly electric Generator 
# Inventory (based on Form EIA-860M as a supplement to Form EIA-860).

library(tidyverse)
library(readxl)
library(here)

months <- rep(c("january", "february", "march", "april", "may", "june", 
                "july", "august", "september", "october", "november", "december"),
              times = 10)
years  <- rep(2015:2024, each = 12)

file_download_list <- paste0("https://www.eia.gov/electricity/data/eia860m/archive/xls/", 
                             months, "_generator", years, ".xlsx")
rm(months, years)

# Remove empty entries
empty_entries <- c("https://www.eia.gov/electricity/data/eia860m/archive/xls/january_generator2015.xlsx",
                   "https://www.eia.gov/electricity/data/eia860m/archive/xls/february_generator2015.xlsx",
                   "https://www.eia.gov/electricity/data/eia860m/archive/xls/march_generator2015.xlsx",
                   "https://www.eia.gov/electricity/data/eia860m/archive/xls/april_generator2015.xlsx",
                   "https://www.eia.gov/electricity/data/eia860m/archive/xls/may_generator2015.xlsx",
                   "https://www.eia.gov/electricity/data/eia860m/archive/xls/june_generator2015.xlsx")

file_download_list <- setdiff(file_download_list, empty_entries)
rm(empty_entries)

output_directory <- here("data", "input", "EIA-860M")

if (!dir.exists(output_directory)) {
  tryCatch({
    dir.create(output_directory, recursive = TRUE)
    message("[OK] Created output directory: ", output_directory)
  }, error = function(e) {
    stop("Failed to create output directory ", output_directory, ": ", e$message)
  })
}

month_to_number <- c("january" = "01", "february" = "02", "march" = "03", "april" = "04", 
                     "may" = "05", "june" = "06", "july" = "07", "august" = "08", 
                     "september" = "09", "october" = "10", "november" = "11", "december" = "12")

for (file_url in file_download_list) {
  
  month_name <- basename(file_url)
  month_name <- sub("_generator[0-9]+.xlsx", "", month_name)
  year <- sub(".*_generator([0-9]{4}).xlsx", "\\1", file_url) 
  
  month_number <- month_to_number[tolower(month_name)]
  
  new_file_name <- paste0(year, "_", month_number, "_", month_name, ".xlsx")
  
  local_path <- file.path(output_directory, new_file_name)
  
  # Check if file already exists
  if (file.exists(local_path)) {
    message("File already exists, skipping download: ", new_file_name)
    next
  }
  
  # Download with error handling
  tryCatch({
    download.file(file_url, local_path, mode = "wb", quiet = TRUE)
    message("Successfully downloaded: ", new_file_name)
  }, error = function(e) {
    warning("Failed to download ", new_file_name, ": ", e$message)
  })
}
message("[OK] Download complete")